# +~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~ #  
#
#' @title  Create figures and tables for main paper
#' @author Hauke Licht
#
# +~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~+~ #

# setup ----

library(readr)
library(dplyr)
library(tidyr)
library(purrr)
library(plm)
library(lmtest)
library(ggplot2)
library(patchwork)
library(grid)

base_path <- file.path(".")
data_path <- file.path(base_path, "data", "output")
helpers_path <- file.path(base_path, "code", "helpers")

source(file.path(helpers_path, "plot_setup.R"))
fig_path <- file.path(base_path, "results", "figures")
dir.create(fig_path, showWarnings = FALSE, recursive = TRUE)

source(file.path(helpers_path, "table_setup.R"))
tables_path <- file.path(base_path, "results", "tables")
dir.create(tables_path, showWarnings = FALSE, recursive = TRUE)

paper_objects <- read_rds(file.path(data_path, "paper_objects.rds"))

# section 3.2 ----

# parl. party tweets data time range
paper_objects$desc$tweets$first_tweet_date
paper_objects$desc$tweets$last_tweet_date

## section 3.2.1 ----

# No. tweets in dataset
sum(paper_objects$desc$tweets$countries_nobs)

# No. parties in dataset
paper_objects$desc$tweets$n_parties

## section 3.2.3 ----

# No. tweets in first coding round 
paper_objects$desc$coding$n_tweets["1"]

# No. tweets in second coding round 
paper_objects$desc$coding$n_tweets["2"]

# No. tweets in second coding round 
paper_objects$desc$coding$n_tweets["total"]

## section 3.2.4 ----

# Krippendorff’s α in the raw and cleaned codings data
paper_objects$desc$coding$alpha_baseline["pooled"]

# inter-method label agreement
labeling_comp <- paper_objects$misc$em_fit_pooled$est_class_probs %>% 
  count(labeling, majority_vote) %>% 
  pivot_wider(names_from = "majority_vote", values_from = "n")
labeling_comp_tab <- as.matrix(labeling_comp[,-1])
rownames(labeling_comp_tab) <- colnames(labeling_comp_tab)
sum(diag(labeling_comp_tab))/sum(labeling_comp_tab)

# label counts and proportions in model-induced labels
paper_objects$misc$em_fit_pooled$est_class_probs %>% 
  count(labeling) %>% 
  mutate(prop = n/sum(n))

## section 3.2.5 ----

# classifier results (Table 1)
cap <- "Out-of-sample predictive performance of ``best'' classifiers trained with different text representation and learning strategies."

tab <- paper_objects$tables$best_classifiers_performances %>% 
  mutate(
    model = case_when(
      features == "XLM-T" ~ "XLM-T classifier"
      , features == "XLM-R embeddings" ~ "MSEs (w/ linear classifier)"
      , features == "Bag-of-words" ~ "MT+BoW (w/ linear classifier)"
    )
  ) %>% 
  transmute(
    `Model` = model
    , `$F1_{\\mbox{macro}}$` = macro
    , `$F1_{\\mbox{micro}}$` = micro
    , Precision = precision
    , Recall = recall
    , Specificity = specificity
  ) %>% 
  quick_kable(
    caption = cap
    , label = "classifier_results"
    , escape = FALSE
  ) %>%
  row_spec(1, bold = TRUE) %>% 
  add_header_above(c(" " = 1, "Overall" = 2, "General elite criticism class" = 3), italic = TRUE) %>% 
  write_kable(
    .file.name = "table01"
    , dir = tables_path
    , overwrite = TRUE
    , position = "!t"
  )    

## section 3.2.6 ----

# share of anti-elite tweets in political tweets
paper_objects$tables$elite_criticism_X_political
# note: see fir row (political == "yes"), column "pct_yes"

# section 3.3 (Validation) ----

## party averages plot (Figure 2) ----

cap <- paste(
  "Estimates of parties' anti-elite strategies."
  , "Points indicate the mean predicted probability of ``general'' elite criticism across a party's tweets."
  , "Coloring distinguishes between populist (orange) and non-populist parties (green)."
  , "\\label{Note:}"
  , "For presentational purposes, we have left out regional parties in Spain and some other minor parties."
  , sep = " "
)

save_plot(
  plot = paper_objects$figures$results$party_plots
  , path = fig_path
  , filename = "figure02"
  , devices = c("png", "eps", "tiff")
  , width = 6.8
  , height = 8.5
)

# difference in means (t stat und p value)
m <- broom::tidy(paper_objects$misc$regressions$dim_populist_party_means)
m$statistic[2]; m$p.value[2]

## plots of quarterly estimates for DE, UK, and SE (Figure 3) ----

cap <- paste(
  "Quarterly estimates of German, British, and Swedish parties' anti-elite strategies. "
  , "One square plotted per party-quarter estimate."
  , "Coloring of squares indicates tweets' mean predicted probability of ``general'' elite criticism for a party--quarter unit."
  , "Lighter values correspond to higher values."
  , "Squares left blank (white) mark periods for which we did not include a given party in our dataset."
  , "Grey-shaded squares mark party-quarters for which no tweets are available to compute an estimate, although we included this party in our dataset for this quarter."
  , "Within country panels, parties are sorted according to their overall ideology from left (top) to right (bottom)."
  , sep = " "
)

save_plot(
  plot = paper_objects$figures$results$paryt_quarter_selected
  , filename = "figure03"
  , path = fig_path
  , devices = c("png", "eps", "tiff")
  , width = 5.5
  , height = 4
)

## correlation with CHES indicators  (Figure 4) ----

cap <- paste(
  "Estimates of parties' anti-elite strategies plotted against"
  , "Chapel Hill Expert Survey (CHES) anti-elite salience indicators."
  , "Plot panel columns indicate CHES waves."
  , "Estimates of parties' anti-elite strategies were obtained by aggregating"
  , "parties' tweets posted in the 12 months before the field end date of a given CHES wave."
  , "\\emph{Note:} "
  , "We have omitted parties with less than 100 tweets in these date ranges."
  , sep = " "
)

save_plot(
  plot = paper_objects$figures$validate$ches_correlations_party_averages
  , path = fig_path
  , filename = "figure04"
  , devices = c("png", "tiff")
  , width = 5.5
  , height = 3
)

# Section 4 (Analyses) ----

## Plot elite criticism by party type and country (Figure 5) ----

cap <- paste(
  "Distribution of quarterly estimates of parties' anti-elite strategies by party type and country."
  , "Party type determined for the parliamentary configuration that was ongoing in a respective quarter in a given country."
  , "Data points outside of 1.5$\\times$inter-quartile ranges not plotted."
  , collapse = " "
)

p <- paper_objects$figures$results$elitecriticism_party_type_by_country + ylab("Anti-elite strategy\n(party-quarter estimates)")

save_plot(
  plot = p
  , path = fig_path
  , filename = "figure05"
  , devices = c("png", "eps", "tiff")
  , width = 6
  , height = 3
)

##  Effect of polls on anti-elite strategies for challenger vs. mainstream parties (Figure 7) ----

# helper functions to create effect plots with inset distribution of the predictor variable
generate_plm_margins_plot <- function(model, pred.var, x.range = NULL, ylab = NULL, xlab = NULL) {
  
  (x_lims <- range(model$model[[pred.var]], na.rm = TRUE))
  if (is.null(x.range)) {
    x_range <- seq(floor(x_lims[1]), ceiling(x_lims[2]))
  } else {
    x_range <- seq(floor(x.range[1]), ceiling(x.range[2]))
  }
  
  # generate the histogram for the 
  p_obs <- ggplot(
    data = setNames(as.data.frame(model$model[pred.var]), "predictor"), 
    mapping = aes(x = predictor)
  ) + 
    geom_histogram(fill = "lightgrey") + 
    scale_y_continuous(name = NULL, breaks = NULL) +
    coord_cartesian(xlim = range(x_range)) + 
    theme(
      # remove/shrink everything sourounding the plot panel
      axis.title = element_blank()
      , axis.text = element_blank()
      , axis.ticks = element_blank()
      , plot.margin = margin()
      # remove grid lines
      , panel.grid.major.y = element_blank()
      , panel.grid.major.x = element_blank()
      # misc
      , plot.background = element_blank()
      , panel.border = element_blank()
    )
  
  # compute mean fixed effect
  mean_fixef <- mean(fixef(model))
  
  # simulate coefficient values using correct panel-corrected VCOV matrix
  sims <- MASS::mvrnorm(1000, coef(model), vcovBK(model, cluster="time"))
  
  # extract correct lagged values for mean_prob_elitecriticism
  lagged_dv <- as.vector(model$model[["lag(mean_prob_elitecriticism)"]])
  # compute means of X variables
  mean_lagged_dv <- mean(lagged_dv)
  
  # for each range of values of lagged polls, compute average pred prob
  
  preds <- data.frame(polls = x_range, y = NA, ymin = NA, ymax = NA)
  for (p in x_range){
    # compute predicted value of Y for each simulated set of coefficients
    ypred <- mean_fixef + cbind(mean_lagged_dv, p) %*% t(sims) 
    # extract 95% CIs based on these values
    preds$y[preds$polls==p] <- median(ypred)
    preds$ymin[preds$polls==p] <- quantile(ypred, probs=0.025)
    preds$ymax[preds$polls==p] <- quantile(ypred, probs=0.975)
  }
  
  p <- ggplot(preds, aes(x = polls, y = y)) +
    geom_ribbon(aes(ymin=ymin, ymax=ymax), fill = "lightgrey", alpha = .5) +
    # geom_line() +
    geom_smooth(method = "lm", se = FALSE, color = "black", linewidth = 1) +
    coord_cartesian(xlim = range(x_range))
  
  if (!is.null(xlab))
    p <- p + xlab(xlab)
  if (!is.null(ylab))
    p <- p + ylab(ylab)
  
  p + 
    theme(
      plot.background = element_rect(fill = "transparent")
      , panel.background = element_rect(fill = "transparent")
    ) +
    # inspired by https://stackoverflow.com/a/68769275
    inset_element(p_obs, left = 0.0, bottom = .01, right = 1.0, top = .15, align_to = "panel", on_top = FALSE)
}


# for mainstream parties
p_a <- generate_plm_margins_plot(
  model = paper_objects$misc$regressions$elitecriticism_polls$mainstream, 
  pred.var = "lag(spolls_mean)",
  ylab = "Predicted anti-elite strategie",
  xlab = "Polling average (t-1)"  
)

save_plot(
  plot = p_a
  , path = fig_path
  , filename = "figure06a"
  , devices = c("png", "tiff")
  , width = 3
  , height = 2.3
)

# for challenger parties
p_b <- generate_plm_margins_plot(
  model = paper_objects$misc$regressions$elitecriticism_polls$challenger,
  pred.var = "lag(spolls_mean)",
  ylab = "Predicted anti-elite strategie",
  xlab = "Polling average (t-1)"
)

save_plot(
  plot = p_b
  , path = fig_path
  , filename = "figure06b"
  , devices = c("png", "tiff")
  , width = 3
  , height = 2.3
)

# combined
p <- (
  p_a + textGrob("(a) Mainstream parties", gp = gpar(fontsize = 10), hjust = 0.5, vjust = 1)
  + 
  p_b + textGrob("(a) Challenger parties", gp = gpar(fontsize = 10), hjust = 0.5, vjust = 1)
) + 
  plot_layout(byrow = FALSE, heights = c(7, 1))

save_plot(
  plot = p
  , path = fig_path
  , filename = "figure06"
  , devices = c("png", "tiff")
  , width = 6
  , height = 2.6
)

##  Effect of CIP on anti-elite strategies for challenger vs. mainstream parties (Figure 8) ----

p_a <- generate_plm_margins_plot(
  model = paper_objects$misc$regressions$elitecriticism_cip$mainstream, 
  pred.var = "lag(cip_mean)",
  ylab = "Predicted anti-elite strategie",
  xlab = "Coalition Inclusion Probability (t-1)"  
)

save_plot(
  plot = p_a
  , path = fig_path
  , filename = "figure07a"
  , devices = c("png", "tiff")
  , width = 3
  , height = 2.3
)

p_b <- generate_plm_margins_plot(
  model = paper_objects$misc$regressions$elitecriticism_cip$challenger, 
  pred.var = "lag(cip_mean)",
  ylab = "Predicted anti-elite strategie",
  xlab = "Coalition Inclusion Probability (t-1)"
)

save_plot(
  plot = p_b
  , path = fig_path
  , filename = "figure07b"
  , devices = c("png", "tiff")
  , width = 3
  , height = 2.3
)

# combined
p <- (
  p_a + textGrob("(a) Mainstream parties", gp = gpar(fontsize = 10), hjust = 0.5, vjust = 1)
  + 
    p_b + textGrob("(a) Challenger parties", gp = gpar(fontsize = 10), hjust = 0.5, vjust = 1)
) + 
  plot_layout(byrow = FALSE, heights = c(7, 1))

save_plot(
  plot = p
  , path = fig_path
  , filename = "figure07"
  , devices = c("png", "tiff")
  , width = 6
  , height = 2.6
)



